
####################################################################

#Functions for Zero Hunger analysis

####################################################################
####################################################################


#Libraries used

library(AER)
library(pscl)
library(bbmle)
library(car)
library(CBPS)
library("plyr")
library("ggplot2")
library("grid")
library("gridExtra")
library(reshape2)
library(dplyr)
library(weights)
library(robustbase)
library(fitdistrplus)
library(gamlss)
library(AER)
library(rgdal)
library(sp)
library(rgeos)
library(spdep)
library(maptools)
library(RColorBrewer)
library(classInt)
library("xlsx")

####################################################################################################
####################################################################################################

#Source other scripts from specific lines
sourcePartial <- function(fn,startTag='#from here',endTag='#to here') {
  lines <- scan(fn, what=character(), sep="\n", quiet=TRUE)
  #st<-grep(startTag,lines)
  #en<-grep(endTag,lines)
  st<-which(lines==startTag)
  en<-which(lines==endTag)
  tc <- textConnection(lines[(st+1):(en-1)])
  source(tc)
  close(tc)
}

#Loads in an object and renames it
loadRData <- function(fileName){
  #loads an RData file, and returns it
  load(fileName)
  get(ls()[ls() != "fileName"])
}

####################################################################################################
###################################################################################################

#Functions needed to make the samples for each model

####################################################################################################
###################################################################################################

#Find variables with zero
NrOfZero <- function(df,var){
  result=NULL
  {
    Number <- length(na.omit(which(df[,var]==0)))
    
    result=Number
  }
}

NrOfZeroALL <- function(df){
  result=NULL
  {
    ALLNames <- names(df)
    test <- lapply(ALLNames, function(y) NrOfZero(df,y))
    TheTableRC <- data.frame(do.call("rbind",test))
    TheTableRC$Vars <- ALLNames
    
    TheTableRC <- subset(TheTableRC, select=c(2,1))
    TheTableRC <- TheTableRC [order(TheTableRC[,2]),]
    names(TheTableRC)[2] <- "Zeros"
    
    TheTableRC <- subset(TheTableRC,Zeros>0)
    TheTableRC$No <- 1:length(TheTableRC$Vars)
    TheTableRC <- subset(TheTableRC, select=c(3,1:2))
    
    TheTableRC$MPI <- grepl(pattern="MPI",TheTableRC[,"Vars"])
    MPIs <- TheTableRC[TheTableRC$MPI == "TRUE", ]
    Other <- TheTableRC[TheTableRC$MPI=="FALSE",]
    
    MPIvars <- as.vector(MPIs[,"Vars"])
    Othervars <- as.vector(Other[,"Vars"])
    
    result=list(MPIvars,Othervars)
  }
}

#get the constant - half of the minimum non-zero value - and add to the variables with zero
AddConstant <- function(df,var,addName){
  result=NULL
  {
    NewDF<- subset(df,select=c(var))
    
    MinValue <- min(NewDF[,var][NewDF[,var]>0],na.rm=T)
    MinValueFifty <- MinValue/2
    
    NewDF[,var] <- ifelse(is.na(NewDF[,var]),NA,NewDF[,var]+MinValueFifty)
    names(NewDF)[1] <- paste(names(NewDF)[1],addName,sep="")#addName
    
    result=NewDF
  }
}

#Make the geometric mean
gmean <- function(x){
  # geometric mean calculation
  prodx <- prod(x)
  n   <- length(x)
  gm <- prodx**(1/n)
  gm
}

#Make MPI for 2000 and 2010, using the geometric means
MakeMPI00 <- function(df){
  result=NULL
  {
    #Health
    df$MPIHealth2000for2000Analysis <- apply(df[,c("MPIHealthLifeExpectancyDepr2000for2000Analysis","MPIHealthInfantMortality2000for2000Analysis")], 1, gmean)
    df$MPIHealth2010for2000Analysis <- apply(df[,c("MPIHealthLifeExpectancyDepr2010for2000Analysis","MPIHealthInfantMortality2010for2000Analysis")], 1, gmean)
    
    #Education is done
    
    #Living standard
    #Geometric mean for all of them - making an "no assets" variable first
    df$MPILivingStandardAssets2000for2000Analysis <- apply(df[,c("MPILivingStandardTV2000for2000Analysis",
                                                                 "MPILivingStandardRadio2000for2000Analysis",
                                                                 "MPILivingStandardTelephone2000for2000Analysis",
                                                                 "MPILivingStandardFridgeFreez2000for2000Analysis",
                                                                 "MPILivingStandardWashingMachine2000for2000Analysis",
                                                                 "MPILivingStandardCar2000for2000Analysis")], 1, gmean)
    df$MPILivingStandardAssets2010for2000Analysis <- apply(df[,c("MPILivingStandardTV2010for2000Analysis",
                                                                 "MPILivingStandardRadio2010for2000Analysis",
                                                                 "MPILivingStandardTelephone2010for2000Analysis",
                                                                 "MPILivingStandardFridgeFreez2010for2000Analysis",
                                                                 "MPILivingStandardWashingMachine2010for2000Analysis",
                                                                 "MPILivingStandardCar2010for2000Analysis")], 1, gmean)
    #Its only the vars that go into the assets that might have had a constant added (so more decimals), never assets
    #so I should keep the 4 decimals here
    df[,c("MPILivingStandardAssets2000for2000Analysis",
          "MPILivingStandardAssets2010for2000Analysis")] <- round(df[,c("MPILivingStandardAssets2000for2000Analysis",
                                                                        "MPILivingStandardAssets2010for2000Analysis")],digits=4)
    
    df$MPILivingStandard2000for2000Analysis <- apply(df[,c("MPILivingStandardAssets2000for2000Analysis",
                                                           "MPILivingStandardWaterPublicNetwork2000for2000Analysis",
                                                           "MPILivingStandardSewagePublicSystemORSeptTank2000for2000Analysis",
                                                           "MPILivingStandardElectricity2000for2000Analysis")], 1, gmean)
    
    df$MPILivingStandard2010for2000Analysis <- apply(df[,c("MPILivingStandardAssets2010for2000Analysis",
                                                           "MPILivingStandardWaterPublicNetwork2010for2000Analysis",
                                                           "MPILivingStandardSewagePublicSystemORSeptTank2010for2000Analysis",
                                                           "MPILivingStandardElectricity2010for2000Analysis")], 1, gmean)
    
    df[,c("MPIHealth2000for2000Analysis",
          "MPIHealth2010for2000Analysis",
          "MPILivingStandard2000for2000Analysis",
          "MPILivingStandard2010for2000Analysis")] <- round(df[,c("MPIHealth2000for2000Analysis",
                                                                  "MPIHealth2010for2000Analysis",
                                                                  "MPILivingStandard2000for2000Analysis",
                                                                  "MPILivingStandard2010for2000Analysis")],digits=4)
    
    #Full MPI 2000 and 2010 
    df$MPI2000for2000Analysis <- apply(df[,c("MPIHealth2000for2000Analysis",
                                             "MPIEducation2000for2000Analysis",
                                             "MPILivingStandard2000for2000Analysis")], 1, gmean)
    
    df$MPI2010for2000Analysis <- apply(df[,c("MPIHealth2010for2000Analysis",
                                             "MPIEducation2010for2000Analysis",
                                             "MPILivingStandard2010for2000Analysis")], 1, gmean)
    
    df[,c("MPI2000for2000Analysis","MPI2010for2000Analysis")] <- round(df[,c("MPI2000for2000Analysis","MPI2010for2000Analysis")],digits=4)
    
    result=df
  }
}

#Make MPI for 2000=4 and 2013, using the geometric means
MakeMPI04 <- function(df){
  result=NULL
  {
    #First make child malnutrition
    df$MPIHealthChildMalnutrition2004for2004Analysis <- apply(df[,c("MPIHealthBirthMalnutrition2004for2004Analysis","MPIHealthOneTwoMalnutrition2004for2004Analysis")], 1, gmean)
    df$MPIHealthChildMalnutrition2013for2004Analysis <- apply(df[,c("MPIHealthBirthMalnutrition2013for2004Analysis","MPIHealthOneTwoMalnutrition2013for2004Analysis")], 1, gmean)
    
    #I make into 4 decimals to be the same as the other sub-vars Im using
    #And this one will never have a constant added onto it, that is the previous stage
    df[,c("MPIHealthChildMalnutrition2004for2004Analysis",
          "MPIHealthChildMalnutrition2013for2004Analysis")] <- round(df[,c("MPIHealthChildMalnutrition2004for2004Analysis",
                                                                           "MPIHealthChildMalnutrition2013for2004Analysis")],digits=4)
    
    #Health
    df$MPIHealth2004for2004Analysis <- apply(df[,c("MPIHealthInfantMortality2004for2004Analysis","MPIHealthChildMalnutrition2004for2004Analysis")], 1, gmean)
    df$MPIHealth2013for2004Analysis <- apply(df[,c("MPIHealthInfantMortality2013for2004Analysis","MPIHealthChildMalnutrition2013for2004Analysis")], 1, gmean)
    
    #Education already done
    
    #Living standard
    df$MPILivingStandard2004for2004Analysis <- apply(df[,c("MPILivingStandardWaterSystem2004for2004Analysis",
                                                           "MPILivingStandardPublicSystemSewage2004for2004Analysis",
                                                           "MPILivingStandardElectricity2004for2004Analysis",
                                                           "MPILivingStandardInadequateWalls2004for2004Analysis")], 1, gmean)
    
    df$MPILivingStandard2013for2004Analysis <- apply(df[,c("MPILivingStandardWaterSystem2013for2004Analysis",
                                                           "MPILivingStandardPublicSystemSewage2013for2004Analysis",
                                                           "MPILivingStandardElectricity2013for2004Analysis",
                                                           "MPILivingStandardInadequateWalls2013for2004Analysis")], 1, gmean)
    #Before putting together get them into same decimals
    #So that its all equal. And this stage will never have constant added onto it
    df[,c("MPIHealth2004for2004Analysis",
          "MPIHealth2013for2004Analysis",
          "MPILivingStandard2004for2004Analysis",
          "MPILivingStandard2013for2004Analysis")] <- round(df[,c("MPIHealth2004for2004Analysis",
                                                                  "MPIHealth2013for2004Analysis",
                                                                  "MPILivingStandard2004for2004Analysis",
                                                                  "MPILivingStandard2013for2004Analysis")],digits=4)
    #Then put them all together
    df$MPI2004for2004Analysis <- apply(df[,c("MPIHealth2004for2004Analysis", 
                                             "MPIEducation2004for2004Analysis",
                                             "MPILivingStandard2004for2004Analysis")], 1, gmean)
    df$MPI2013for2004Analysis <- apply(df[,c("MPIHealth2004for2004Analysis", 
                                             "MPIEducation2013for2004Analysis",
                                             "MPILivingStandard2013for2004Analysis")], 1, gmean)
    df[,c("MPI2004for2004Analysis",
          "MPI2013for2004Analysis")] <- round(df[,c("MPI2004for2004Analysis",
                                                    "MPI2013for2004Analysis")],digits=4)
    result=df
    
  }
}

#Set level order by frequency of levels
setRefToMostCommonLevel <- function(f) { 
  f <- as.factor(f) 
  t <- table(f)
  relevel(f,ref=as.integer(which(t>=max(t))[[1]]))
}

#Create and scale logged treatment variables
getDataFrame <- function(df,TypeContrast, contrastVar1="state", contrastVar2="biome",MaxRefLevFunction,
                         BaseVar="ZHvariable",MainName="Lin") {
  {
    result=NULL
  }
  df <- na.omit(df)#take away empties
  
  df[,contrastVar1] <- droplevels(df[,contrastVar1])#fix categorical variable 1
  contrasts(df[,contrastVar1]) <- TypeContrast
  df[,contrastVar1] <- MaxRefLevFunction(df[,contrastVar1])
  
  df[,contrastVar2] <- droplevels(df[,contrastVar2])#fix categorical variable 2
  contrasts(df[,contrastVar2]) <- TypeContrast
  df[,contrastVar2] <- MaxRefLevFunction(df[,contrastVar2])
  
  df$column = scale(df[,BaseVar])#make scaled linear
  
  colnames(df)[colnames(df)=="column"] <- MainName#name as I wish linear
  
  result=df
}

####################################################################################################
###################################################################################################

#Functions needed to create CBGPS weights and run multivariate regression models

####################################################################################################
###################################################################################################

#Create CBGPS weights

#Parametric weights
GetCBGPSfit2 <- function(namefolders,script,start1,end1,yearsample,timeperiod,progr,biomvar,namefolderssave,fitnamesave){
  result=NULL
  {
    #Get the sample that you need - based on individual scripts
    #get directory first - same I will use to save the new file
    script <- paste(namefolders,script,sep="")
    
    sourcePartial(script,startTag="#s1",endTag="#s2")
    sourcePartial(script,startTag="#start1_",endTag="#fin1_")
    sourcePartial(script,"#start2_","#fin2_")
    sourcePartial(script,start1,end1)
    
    print(c(cbpsDepVar,cbpsbaselineDepVar))
    getnames <- paste(cbpsDepVar,cbpsbaselineDepVar,sep="_")
    
    #1. Get on the election variable
    ElecDF <- read.csv(paste("Data/Elections/AllElectionPerMun_",yearsample,sep="",paste("sample_Fin.csv",sep="")),
                       header=TRUE, sep=",")
    
    ElecDFsub <- subset(ElecDF, select=c("codeibge",paste("PresElec_vs_",timeperiod,sep="",paste("_w_",progr,sep=""))))
    names(ElecDFsub)[1] <- "IBGECode7digit"
    
    #Include here that it has to be the same sample as rural!
    RuralPoverty04quality <- merge(x=RuralPoverty04quality,y=ElecDFsub,by="IBGECode7digit",all.x=T)
    
    #Print some useful information, and gather key sample information to be saved in a df at the end
    print(length(RuralPoverty04quality[,1]))
    getN <- length(RuralPoverty04quality[,1])
    elecvar <- length(names(RuralPoverty04quality))
    print(length(which(is.na(RuralPoverty04quality[,elecvar]))))
    getNAs <- length(which(is.na(RuralPoverty04quality[,elecvar])))
    RuralPoverty04quality <- RuralPoverty04quality[!is.na(RuralPoverty04quality[,elecvar]),]
    print(length(RuralPoverty04quality[,1]))
    getnewN <- length(RuralPoverty04quality[,1])
    
    #2. Get the reference levels (want this to be the ones with the least observations)
    #2.1. Cannot have empty levels
    RuralPoverty04quality$stateName <- droplevels(RuralPoverty04quality$stateName)
    RuralPoverty04quality[,biomvar] <- droplevels(RuralPoverty04quality[,biomvar])
    
    #2.2. Find the category with least observations
    
    #State
    onetabS <- data.frame(table(RuralPoverty04quality[,"stateName"]))
    onetabS <- onetabS [order(onetabS$Freq),]
    onetabS$Var1 <- as.character(onetabS$Var1)
    
    smallestS <- onetabS[1,1]
    smallestSnr <- onetabS[1,2]
    ndsmallestS <- onetabS[2,1]
    ndsmallestSnr <- onetabS[2,2]
    thrdsmallestS <- onetabS[3,1]
    thrdsmallestSnr <- onetabS[3,2]
    
    #Biome
    onetabB <- data.frame(table(RuralPoverty04quality[,biomvar]))
    onetabB <- onetabB [order(onetabB$Freq),]
    onetabB$Var1 <- as.character(onetabB$Var1)
    
    smallestB <- onetabB[1,1]
    smallestBnr <- onetabB[1,2]
    ndsmallestB <- onetabB[2,1]
    ndsmallestBnr <- onetabB[2,2]
    thrdsmallestB <- onetabB[3,1]
    thrdsmallestBnr <- onetabB[3,2]
    
    #2.3. Set the smallest state/biome as the reference (the last one)
    
    #state Reference
    stateSeq <- levels(RuralPoverty04quality$stateName)
    stateSeqNoRef <- stateSeq [! stateSeq %in% smallestS]
    stateSeqFin <- c(stateSeqNoRef,smallestS)#this ensures that the smallest one is last!
    
    #biome Reference
    biomeSeq <- levels(RuralPoverty04quality[,biomvar])
    biomeSeqNoRef <- biomeSeq [! biomeSeq %in% smallestB]
    biomeSeqFin <- c(biomeSeqNoRef,smallestB)#this ensures that the smallest one is last!
    
    #set the level
    RuralPoverty04quality$stateName <- factor(RuralPoverty04quality$stateName, levels = stateSeqFin)
    RuralPoverty04quality[,biomvar] <- factor(RuralPoverty04quality[,biomvar], levels = biomeSeqFin)
    
    #2.4. Set the contrast
    contrasts(RuralPoverty04quality$stateName) <- "contr.Sum"
    contrasts(RuralPoverty04quality[,biomvar]) <- "contr.Sum"
    
    #Then can compare the reference from contrast with the table of smallest to make sure the right one is selected
    
    checkcontrState <- data.frame(contrasts(RuralPoverty04quality[,"stateName"]))
    checkcontrState$Var <- row.names(checkcontrState)
    checkcontrStateRef <- checkcontrState[which(checkcontrState[,1] < 0),]
    checkcontrStateRef <- checkcontrStateRef$Var
    checkcontrStateRefOrig <- checkcontrStateRef
    
    checkcontrBiome <- data.frame(contrasts(RuralPoverty04quality[,biomvar]))
    checkcontrBiome$Var <- row.names(checkcontrBiome)
    checkcontrBiomeRef <- checkcontrBiome[which(checkcontrBiome[,1] < 0),]
    checkcontrBiomeRef <- checkcontrBiomeRef$Var
    checkcontrBiomeRefOrig <- checkcontrBiomeRef
    
    #3.Make cbgps formula, adding on the election variable
    my.formula <- as.formula(reformulate(termlabels=c(cbpsbaselineDepVar,cbpsVars),response=cbpsDepVar))
    addTerm <- paste("PresElec_vs_",timeperiod,sep="",paste("_w_",progr,sep=""))
    newmodformula <- update(my.formula,paste("~. +", addTerm))
    
    #4. Make the CBPS weight
    fit2 <- CBPS(newmodformula, data=RuralPoverty04quality)
    
    #5. Save the weights in respective fold
    save(fit2, file = paste(namefolderssave,fitnamesave,sep="",paste(".rda",sep="")))
    
    #6. Get out a log for the weight (sample size and reference)
    tog <- cbind(getnames,getN,getNAs,getnewN,checkcontrStateRefOrig,smallestS,smallestSnr,ndsmallestS,ndsmallestSnr,thrdsmallestS,thrdsmallestSnr,
                 checkcontrBiomeRefOrig,smallestB,smallestBnr,ndsmallestB,ndsmallestBnr,thrdsmallestB,thrdsmallestBnr)
    
    result <- tog
  }
}

#Parametric weights where model influential points are first deleted and then weights created
GetCBGPSfit2new <- function(namefolders,script,start1,end1,yearsample,timeperiod,progr,inffunc,namefolderssave,modlist,nrmod,biomvar,fitnamesave){
  result=NULL
  {
    #Read in the sample
    script <- paste(namefolders,script,sep="")
    
    sourcePartial(script,startTag="#s1",endTag="#s2")
    sourcePartial(script,startTag="#start1_",endTag="#fin1_")
    sourcePartial(script,"#start2_","#fin2_")
    sourcePartial(script,start1,end1)
    
    print(c(cbpsDepVar,cbpsbaselineDepVar))
    getnames <- paste(cbpsDepVar,cbpsbaselineDepVar,sep="_")
    
    #1. Get on the election variable
    ElecDF <- read.csv(paste("Data/Elections/AllElectionPerMun_",yearsample,sep="",paste("sample_Fin.csv",sep="")),
                       header=TRUE, sep=",")
    
    ElecDFsub <- subset(ElecDF, select=c("codeibge",paste("PresElec_vs_",timeperiod,sep="",paste("_w_",progr,sep=""))))
    names(ElecDFsub)[1] <- "IBGECode7digit"
    
    #it has to be the same sample as rural
    RuralPoverty04quality <- merge(x=RuralPoverty04quality,y=ElecDFsub,by="IBGECode7digit",all.x=T)
    
    #Get relevant sample information
    print(length(RuralPoverty04quality[,1]))
    getN <- length(RuralPoverty04quality[,1])
    elecvar <- length(names(RuralPoverty04quality))
    print(length(which(is.na(RuralPoverty04quality[,elecvar]))))
    getNAs <- length(which(is.na(RuralPoverty04quality[,elecvar])))
    RuralPoverty04quality <- RuralPoverty04quality[!is.na(RuralPoverty04quality[,elecvar]),]
    print(length(RuralPoverty04quality[,1]))
    getnewN <- length(RuralPoverty04quality[,1])
    
    #When Negative binomial/Quasi poisson model selection function that excludes influential points
    #based on a full model. When robust OLS pick an "empty" function that just returns the same df as put in
    RuralPoverty04quality <- inffunc(RuralPoverty04quality,namefolderssave,modlist,nrmod)
    
    getnewNnoInf <- length(RuralPoverty04quality[,1])
    
    #2. Get the reference levels (want this to be the ones with lowest)
    #2.1. Cannot have empty levels
    RuralPoverty04quality$stateName <- droplevels(RuralPoverty04quality$stateName)
    RuralPoverty04quality[,biomvar] <- droplevels(RuralPoverty04quality[,biomvar])
    
    #2.2. Find the category with least observations
    
    #State
    onetabS <- data.frame(table(RuralPoverty04quality[,"stateName"]))
    onetabS <- onetabS [order(onetabS$Freq),]
    onetabS$Var1 <- as.character(onetabS$Var1)
    
    smallestS <- onetabS[1,1]
    smallestSnr <- onetabS[1,2]
    ndsmallestS <- onetabS[2,1]
    ndsmallestSnr <- onetabS[2,2]
    thrdsmallestS <- onetabS[3,1]
    thrdsmallestSnr <- onetabS[3,2]
    
    #Biome
    onetabB <- data.frame(table(RuralPoverty04quality[,biomvar]))
    onetabB <- onetabB [order(onetabB$Freq),]
    onetabB$Var1 <- as.character(onetabB$Var1)
    
    smallestB <- onetabB[1,1]
    smallestBnr <- onetabB[1,2]
    ndsmallestB <- onetabB[2,1]
    ndsmallestBnr <- onetabB[2,2]
    thrdsmallestB <- onetabB[3,1]
    thrdsmallestBnr <- onetabB[3,2]
    
    #2.3. Set the smallest state/biome as the reference (the last one)
    
    #state Reference
    stateSeq <- levels(RuralPoverty04quality$stateName)
    stateSeqNoRef <- stateSeq [! stateSeq %in% smallestS]
    stateSeqFin <- c(stateSeqNoRef,smallestS)#this ensures that the smallest one is last!
    
    #biome Reference
    biomeSeq <- levels(RuralPoverty04quality[,biomvar])
    biomeSeqNoRef <- biomeSeq [! biomeSeq %in% smallestB]
    biomeSeqFin <- c(biomeSeqNoRef,smallestB)#this ensures that the smallest one is last!
    
    #set the level
    RuralPoverty04quality$stateName <- factor(RuralPoverty04quality$stateName, levels = stateSeqFin)
    RuralPoverty04quality[,biomvar] <- factor(RuralPoverty04quality[,biomvar], levels = biomeSeqFin)
    
    #2.4. Set the contrast
    contrasts(RuralPoverty04quality$stateName) <- "contr.Sum"
    contrasts(RuralPoverty04quality[,biomvar]) <- "contr.Sum"
    
    #compare the reference from contrast with the table of smallest to make sure I have the right one!
    checkcontrState <- data.frame(contrasts(RuralPoverty04quality[,"stateName"]))
    checkcontrState$Var <- row.names(checkcontrState)
    checkcontrStateRef <- checkcontrState[which(checkcontrState[,1] < 0),]
    checkcontrStateRef <- checkcontrStateRef$Var
    checkcontrStateRefOrig <- checkcontrStateRef
    
    checkcontrBiome <- data.frame(contrasts(RuralPoverty04quality[,biomvar]))
    checkcontrBiome$Var <- row.names(checkcontrBiome)
    checkcontrBiomeRef <- checkcontrBiome[which(checkcontrBiome[,1] < 0),]
    checkcontrBiomeRef <- checkcontrBiomeRef$Var
    checkcontrBiomeRefOrig <- checkcontrBiomeRef
    
    #3.Make cbps formula and add election variable
    my.formula <- as.formula(reformulate(termlabels=c(cbpsbaselineDepVar,cbpsVars),response=cbpsDepVar))
    addTerm <- paste("PresElec_vs_",timeperiod,sep="",paste("_w_",progr,sep=""))
    newmodformula <- update(my.formula,paste("~. +", addTerm))#makes a new formula with the added variable
    
    #4. Make the CBPS weight
    fit2 <- CBPS(newmodformula, data=RuralPoverty04quality)
    
    #5. Save the weights
    save(fit2, file = paste(namefolderssave,fitnamesave,sep="",paste(".rda",sep="")))
    
    #6. Get out a log for the weight (sample size and reference)
    tog <- cbind(getnames,getN,getNAs,getnewN,getnewNnoInf,checkcontrStateRefOrig,smallestS,smallestSnr,ndsmallestS,ndsmallestSnr,thrdsmallestS,
                 thrdsmallestSnr,checkcontrBiomeRefOrig,smallestB,smallestBnr,ndsmallestB,ndsmallestBnr,thrdsmallestB,thrdsmallestBnr)
    
    result <- tog
  }
}

#Non-Parametric weights
GetCBGPSfit3 <- function(namefolders,script,start1,end1,yearsample,timeperiod,progr,biomvar,namefolderssave,fitnamesave){
  result=NULL
  {
    #Get the sample that you need - based on individual scripts
    #get directory first - same I will use to save the new file
    script <- paste(namefolders,script,sep="")
    
    sourcePartial(script,startTag="#s1",endTag="#s2")
    sourcePartial(script,startTag="#start1_",endTag="#fin1_")
    sourcePartial(script,"#start2_","#fin2_")
    sourcePartial(script,start1,end1)
    
    print(c(cbpsDepVar,cbpsbaselineDepVar))
    getnames <- paste(cbpsDepVar,cbpsbaselineDepVar,sep="_")
    
    #1. Get on the election variable
    ElecDF <- read.csv(paste("Data/Elections/AllElectionPerMun_",yearsample,sep="",paste("sample_Fin.csv",sep="")),
                       header=TRUE, sep=",")
    
    ElecDFsub <- subset(ElecDF, select=c("codeibge",paste("PresElec_vs_",timeperiod,sep="",paste("_w_",progr,sep=""))))
    names(ElecDFsub)[1] <- "IBGECode7digit"
    
    #it has to be the same sample as rural sample
    RuralPoverty04quality <- merge(x=RuralPoverty04quality,y=ElecDFsub,by="IBGECode7digit",all.x=T)
    
    #Print and save key information to be saved in log df below
    print(length(RuralPoverty04quality[,1]))
    getN <- length(RuralPoverty04quality[,1])
    elecvar <- length(names(RuralPoverty04quality))
    print(length(which(is.na(RuralPoverty04quality[,elecvar]))))
    getNAs <- length(which(is.na(RuralPoverty04quality[,elecvar])))
    RuralPoverty04quality <- RuralPoverty04quality[!is.na(RuralPoverty04quality[,elecvar]),]
    print(length(RuralPoverty04quality[,1]))
    getnewN <- length(RuralPoverty04quality[,1])
    
    #2. Get the reference levels (want this to be the ones with fewest observations)
    #2.1. Cannot have empty levels
    RuralPoverty04quality$stateName <- droplevels(RuralPoverty04quality$stateName)
    RuralPoverty04quality[,biomvar] <- droplevels(RuralPoverty04quality[,biomvar])
    
    #2.2. Find the category with least observations
    
    #State
    onetabS <- data.frame(table(RuralPoverty04quality[,"stateName"]))
    onetabS <- onetabS [order(onetabS$Freq),]
    onetabS$Var1 <- as.character(onetabS$Var1)
    
    smallestS <- onetabS[1,1]
    smallestSnr <- onetabS[1,2]
    ndsmallestS <- onetabS[2,1]
    ndsmallestSnr <- onetabS[2,2]
    thrdsmallestS <- onetabS[3,1]
    thrdsmallestSnr <- onetabS[3,2]
    
    #Biome
    onetabB <- data.frame(table(RuralPoverty04quality[,biomvar]))
    onetabB <- onetabB [order(onetabB$Freq),]
    onetabB$Var1 <- as.character(onetabB$Var1)
    
    smallestB <- onetabB[1,1]
    smallestBnr <- onetabB[1,2]
    ndsmallestB <- onetabB[2,1]
    ndsmallestBnr <- onetabB[2,2]
    thrdsmallestB <- onetabB[3,1]
    thrdsmallestBnr <- onetabB[3,2]
    
    #2.3. Set the smallest state/biome as the reference (the last one)
    
    #state Reference
    stateSeq <- levels(RuralPoverty04quality$stateName)
    stateSeqNoRef <- stateSeq [! stateSeq %in% smallestS]
    stateSeqFin <- c(stateSeqNoRef,smallestS)#this ensures that the smallest one is last!
    
    #biome Reference
    biomeSeq <- levels(RuralPoverty04quality[,biomvar])
    biomeSeqNoRef <- biomeSeq [! biomeSeq %in% smallestB]
    biomeSeqFin <- c(biomeSeqNoRef,smallestB)#this ensures that the smallest one is last!
    
    #set the level
    RuralPoverty04quality$stateName <- factor(RuralPoverty04quality$stateName, levels = stateSeqFin)
    RuralPoverty04quality[,biomvar] <- factor(RuralPoverty04quality[,biomvar], levels = biomeSeqFin)
    
    #2.4. Set the contrast
    contrasts(RuralPoverty04quality$stateName) <- "contr.Sum"
    contrasts(RuralPoverty04quality[,biomvar]) <- "contr.Sum"
    
    #Then can compare the reference from contrast with the table of smallest to make sure the right one is chosen
    checkcontrState <- data.frame(contrasts(RuralPoverty04quality[,"stateName"]))
    checkcontrState$Var <- row.names(checkcontrState)
    checkcontrStateRef <- checkcontrState[which(checkcontrState[,1] < 0),]
    checkcontrStateRef <- checkcontrStateRef$Var
    checkcontrStateRefOrig <- checkcontrStateRef
    
    checkcontrBiome <- data.frame(contrasts(RuralPoverty04quality[,biomvar]))
    checkcontrBiome$Var <- row.names(checkcontrBiome)
    checkcontrBiomeRef <- checkcontrBiome[which(checkcontrBiome[,1] < 0),]
    checkcontrBiomeRef <- checkcontrBiomeRef$Var
    checkcontrBiomeRefOrig <- checkcontrBiomeRef
    
    #3.Make cbps formula that includes the election variable
    my.formula <- as.formula(reformulate(termlabels=c(cbpsbaselineDepVar,cbpsVars),response=cbpsDepVar))
    addTerm <- paste("PresElec_vs_",timeperiod,sep="",paste("_w_",progr,sep=""))
    newmodformula <- update(my.formula,paste("~. +", addTerm))
    
    #4. Make the CBPS weight
    fit3 <- npCBPS(newmodformula, data=RuralPoverty04quality)
    
    #5. Save it
    save(fit3, file = paste(namefolderssave,fitnamesave,sep="",paste(".rda",sep="")))
    
    #6. Get out a log for the weight (sample size and reference)
    tog <- cbind(getnames,getN,getNAs,getnewN,checkcontrStateRefOrig,smallestS,smallestSnr,ndsmallestS,ndsmallestSnr,thrdsmallestS,thrdsmallestSnr,
                 checkcontrBiomeRefOrig,smallestB,smallestBnr,ndsmallestB,ndsmallestBnr,thrdsmallestB,thrdsmallestBnr)
    
    result <- tog
  }
}

#Non-Parametric weights where model influential points are first deleted and then weights created
GetCBGPSfit3new <- function(namefolders,script,start1,end1,yearsample,timeperiod,progr,inffunc,namefolderssave,modlist,nrmod,biomvar,fitnamesave){
  result=NULL
  {
    #Get sample 
    script <- paste(namefolders,script,sep="")
    
    sourcePartial(script,startTag="#s1",endTag="#s2")
    sourcePartial(script,startTag="#start1_",endTag="#fin1_")
    sourcePartial(script,"#start2_","#fin2_")
    sourcePartial(script,start1,end1)
    
    print(c(cbpsDepVar,cbpsbaselineDepVar))
    getnames <- paste(cbpsDepVar,cbpsbaselineDepVar,sep="_")
    
    #1. Get on the election variable
    ElecDF <- read.csv(paste("Data/Elections/AllElectionPerMun_",yearsample,sep="",paste("sample_Fin.csv",sep="")),
                       header=TRUE, sep=",")
    
    ElecDFsub <- subset(ElecDF, select=c("codeibge",paste("PresElec_vs_",timeperiod,sep="",paste("_w_",progr,sep=""))))
    names(ElecDFsub)[1] <- "IBGECode7digit"
    
    #it has to be the same sample as rural
    RuralPoverty04quality <- merge(x=RuralPoverty04quality,y=ElecDFsub,by="IBGECode7digit",all.x=T)
    
    #Get relevant sample information
    print(length(RuralPoverty04quality[,1]))
    getN <- length(RuralPoverty04quality[,1])
    elecvar <- length(names(RuralPoverty04quality))#just to get the final var/election var
    print(length(which(is.na(RuralPoverty04quality[,elecvar]))))
    getNAs <- length(which(is.na(RuralPoverty04quality[,elecvar])))
    RuralPoverty04quality <- RuralPoverty04quality[!is.na(RuralPoverty04quality[,elecvar]),]
    print(length(RuralPoverty04quality[,1]))
    getnewN <- length(RuralPoverty04quality[,1])
    
    #For Quasi-Poisson/Negative Binomial models selection function that excludes influential points
    #For robust OLS select "empty" function that just returns the same df
    RuralPoverty04quality <- inffunc(RuralPoverty04quality,namefolderssave,modlist,nrmod)
    
    getnewNnoInf <- length(RuralPoverty04quality[,1])
    
    #2. Get the reference levels (want this to be the ones with lowest)
    #2.1. Cannot have empty levels
    RuralPoverty04quality$stateName <- droplevels(RuralPoverty04quality$stateName)
    RuralPoverty04quality[,biomvar] <- droplevels(RuralPoverty04quality[,biomvar])
    
    #2.2. Find the category with least observations
    
    #State
    onetabS <- data.frame(table(RuralPoverty04quality[,"stateName"]))
    onetabS <- onetabS [order(onetabS$Freq),]
    onetabS$Var1 <- as.character(onetabS$Var1)
    
    smallestS <- onetabS[1,1]
    smallestSnr <- onetabS[1,2]
    ndsmallestS <- onetabS[2,1]
    ndsmallestSnr <- onetabS[2,2]
    thrdsmallestS <- onetabS[3,1]
    thrdsmallestSnr <- onetabS[3,2]
    
    #Biome
    onetabB <- data.frame(table(RuralPoverty04quality[,biomvar]))
    onetabB <- onetabB [order(onetabB$Freq),]
    onetabB$Var1 <- as.character(onetabB$Var1)
    
    smallestB <- onetabB[1,1]
    smallestBnr <- onetabB[1,2]
    ndsmallestB <- onetabB[2,1]
    ndsmallestBnr <- onetabB[2,2]
    thrdsmallestB <- onetabB[3,1]
    thrdsmallestBnr <- onetabB[3,2]
    
    #2.3. Set the smallest state/biome as the reference (the last one)
    
    #state Reference
    stateSeq <- levels(RuralPoverty04quality$stateName)
    stateSeqNoRef <- stateSeq [! stateSeq %in% smallestS]
    stateSeqFin <- c(stateSeqNoRef,smallestS)#this ensures that the smallest one is last!
    
    #biome Reference
    biomeSeq <- levels(RuralPoverty04quality[,biomvar])
    biomeSeqNoRef <- biomeSeq [! biomeSeq %in% smallestB]
    biomeSeqFin <- c(biomeSeqNoRef,smallestB)#this ensures that the smallest one is last!
    
    #set the level
    RuralPoverty04quality$stateName <- factor(RuralPoverty04quality$stateName, levels = stateSeqFin)
    RuralPoverty04quality[,biomvar] <- factor(RuralPoverty04quality[,biomvar], levels = biomeSeqFin)
    
    #2.4. Set the contrast
    contrasts(RuralPoverty04quality$stateName) <- "contr.Sum"
    contrasts(RuralPoverty04quality[,biomvar]) <- "contr.Sum"
    
    #compare the reference from contrast with the table of smallest to make sure the right one is selected
    checkcontrState <- data.frame(contrasts(RuralPoverty04quality[,"stateName"]))
    checkcontrState$Var <- row.names(checkcontrState)
    checkcontrStateRef <- checkcontrState[which(checkcontrState[,1] < 0),]
    checkcontrStateRef <- checkcontrStateRef$Var
    checkcontrStateRefOrig <- checkcontrStateRef
    
    checkcontrBiome <- data.frame(contrasts(RuralPoverty04quality[,biomvar]))
    checkcontrBiome$Var <- row.names(checkcontrBiome)
    checkcontrBiomeRef <- checkcontrBiome[which(checkcontrBiome[,1] < 0),]
    checkcontrBiomeRef <- checkcontrBiomeRef$Var
    checkcontrBiomeRefOrig <- checkcontrBiomeRef
    
    #3.Make cbps formula with the election variable
    my.formula <- as.formula(reformulate(termlabels=c(cbpsbaselineDepVar,cbpsVars),response=cbpsDepVar))
    addTerm <- paste("PresElec_vs_",timeperiod,sep="",paste("_w_",progr,sep=""))
    newmodformula <- update(my.formula,paste("~. +", addTerm))
    
    #4. Make the CBPS weight
    fit3 <- npCBPS(newmodformula, data=RuralPoverty04quality)
    
    #5. Save it
    save(fit3, file = paste(namefolderssave,fitnamesave,sep="",paste(".rda",sep="")))
    
    #6. Get out a log for the weight (sample size and reference)
    tog <- cbind(getnames,getN,getNAs,getnewN,getnewNnoInf,checkcontrStateRefOrig,smallestS,smallestSnr,ndsmallestS,ndsmallestSnr,thrdsmallestS,
                 thrdsmallestSnr,checkcontrBiomeRefOrig,smallestB,smallestBnr,ndsmallestB,ndsmallestBnr,thrdsmallestB,thrdsmallestBnr)
    
    result <- tog
  }
}


#Function to get out key information from CBGPS weights
CBPStable <- function(cbpsoutput=fit2,nameVar="Kalories")
{
  result=NULL
  {
    #correlation for each covariate in model, based on package function plot
    CBPSbox <- plot(cbpsoutput,silent=FALSE,boxplot=TRUE)
    
    #get summary values and gather in a df
    OriginalCorrelationAv <- round((mean(CBPSbox$original)),3)
    OriginalCorrelationMax<- round(max(CBPSbox$original),3)
    CBPSadjustedCorrelationAv <- round((mean(CBPSbox$balanced)),3)
    CBPSadjustedCorrelationMax <-round(max(CBPSbox$balanced),3)
    
    diff2 <- (mean(CBPSbox$balanced)-(mean(CBPSbox$original)))
    changeincorrelation <- round((diff2/OriginalCorrelationAv*100),0)
    
    Models <- nameVar
    
    CBPStable <- data.frame(cbind(Models,OriginalCorrelationAv,OriginalCorrelationMax,CBPSadjustedCorrelationAv,
                                  CBPSadjustedCorrelationMax,changeincorrelation))
    
    CBPStable$OriginalCorrelation = paste(paste(CBPStable$OriginalCorrelationAv,CBPStable$OriginalCorrelationMax,sep=" ("),")")
    CBPStable$CBPSadjustedCorrelation = paste(paste(CBPStable$CBPSadjustedCorrelationAv,CBPStable$CBPSadjustedCorrelationMax,sep=" ("),")")
    
    CBPStable <- subset(CBPStable, select=c("Models","OriginalCorrelation","CBPSadjustedCorrelation","changeincorrelation"))
    
    #a list with each indivdiual correlation, and summary of correlation per weight
    result=list(CBPSbox,CBPStable)
  }
}

#Function that gets only the summary information for each CBGPS weight
getAllCBPStable<- function(folder,file,namefile){
  result <- NULL
  {
    fullmod <- loadRData(paste(folder,file,sep=""))#file
    
    n <- length(fullmod$weights)
    
    CBPStabletest <- CBPStable(fullmod,namefile)
    CBPStableOne <- CBPStabletest[[2]]
    CBPStableOne$Sample_n <- n
    
    result <- CBPStableOne
    
  }
}

#Function that compares the parametric CBGPS (fit2) with non-parametric CBGPS (fit2)
#The output df will make it easy to assess which CBGPS weight is best for each model so that that can be chosen and used in subsequent 
#weighted regression model
FitCompFunc <- function(fit2ones,fit3ones){
  result=NULL
  {
    #allfit2
    allFit2ResDF <- data.frame(do.call("rbind",fit2ones))
    names(allFit2ResDF)[2:5] <- paste(names(allFit2ResDF)[2:5],"_fit2",sep="")
    head(allFit2ResDF)
    
    #allfit3
    allFit3ResDF <- data.frame(do.call("rbind",fit3ones))
    names(allFit3ResDF)[2:5] <- paste(names(allFit3ResDF)[2:5],"_fit3",sep="")
    
    #Tog
    Tog <- merge(x=allFit2ResDF, y=allFit3ResDF, by="Models")
    Tog[c("changeincorrelation_fit2","changeincorrelation_fit3")] <- sapply(Tog[c("changeincorrelation_fit2","changeincorrelation_fit3")], as.character)
    Tog[c("changeincorrelation_fit2","changeincorrelation_fit3")] <- sapply(Tog[c("changeincorrelation_fit2","changeincorrelation_fit3")], as.numeric)
    
    #a categorical variable that says whether fit2 or fit3 is better or worse
    Tog$fit2diff <-ifelse(Tog$changeincorrelation_fit2 < Tog$changeincorrelation_fit3,"Better","Worse")
    Tog$fit2diffCom <- ifelse(Tog$fit2diff=="Worse","But if fit2max lower pick fit2","")
    #in the cases where the average might be lower in fit2 (fit3) BUT the maximum correlation between programme and one covariate is lower in 
    #fit3 (fit2) then pick fit3 (fit2) because we then avoid any covariates with high remaining correlation. Particularly since the average correlation
    #for fit2 and fit3 tend to be very similar
    
    Tog <- subset(Tog, select=c("Models","changeincorrelation_fit2","changeincorrelation_fit3","fit2diff","fit2diffCom",
                                "CBPSadjustedCorrelation_fit2",
                                "CBPSadjustedCorrelation_fit3",
                                "OriginalCorrelation_fit2","OriginalCorrelation_fit3",
                                "Sample_n_fit2","Sample_n_fit3"))
    result <- Tog
    
  }
}


#########################################################################################################

#Functions related to multivariate regression models


#Function that runs three robust OLS models weighted by the parametric CBGPS weights (fit2), 
#i) a model with all control variables and the programme independent variable
#ii) a model with all control variables but no programme indenendnt variable, and
#iii) a model with  all control variables, the programme independent variable, and an interaction term between programme independent variable 
#and state interaction
ROBCBPSmodFuncfit2 <- function(df,FullMod){
  result=NULL
  {
    #to keep the same setup as QP/NB and for this to work in a function
    RuralPoverty04quality <- df
    
    #1.
    my.formula <- FullMod[[1]]
    LinMod <- try(lmrob( my.formula, weights = fit2$weights,data=RuralPoverty04quality,fast.s.large.n = Inf,max.it=500,k.max=2000))
    
    #2.
    my.formula <- FullMod[[2]]
    NullMod <- try(lmrob( my.formula, weights = fit2$weights,data=RuralPoverty04quality,fast.s.large.n = Inf,max.it=500,k.max=2000))
    
    #3.
    my.formula <- FullMod[[3]]
    StateMod <- try(lmrob( my.formula, weights = fit2$weights,data=RuralPoverty04quality,fast.s.large.n = Inf,max.it=500,k.max=2000))
    
    result=list(LinMod,NullMod,StateMod)
  }
}

#Function that runs three robust OLS models weighted by the on-parametric CBGPS weights (fit3), 
#i) a model with all control variables and the programme independent variable
#ii) a model with all control variables but no programme indenendnt variable, and
#iii) a model with  all control variables, the programme independent variable, and an interaction term between programme independent variable 
#and state interaction
ROBCBPSmodFuncfit3 <- function(df,FullMod){
  result=NULL
  {
    #to keep the same setup as QP/NB and for this to work in a function
    RuralPoverty04quality <- df
    
    #1.
    my.formula <- FullMod[[1]]
    LinMod <- try(lmrob( my.formula, weights = fit3$weights,data=RuralPoverty04quality,fast.s.large.n = Inf,max.it=500,k.max=2000))
    
    #2.
    my.formula <- FullMod[[2]]
    NullMod <- try(lmrob( my.formula, weights = fit3$weights,data=RuralPoverty04quality,fast.s.large.n = Inf,max.it=500,k.max=2000))
    
    #3.
    my.formula <- FullMod[[3]]
    StateMod <- try(lmrob( my.formula, weights = fit3$weights,data=RuralPoverty04quality,fast.s.large.n = Inf,max.it=500,k.max=2000))
    
    result=list(LinMod,NullMod,StateMod)
  }
}


#Function that runs three negative binomial models weighted by the parametric CBGPS weights (fit2), 
#i) a model with all control variables and the programme independent variable
#ii) a model with all control variables but no programme indenendnt variable, and
#iii) a model with  all control variables, the programme independent variable, and an interaction term between programme independent variable 
#and state interaction
NBmodFuncfit2 <- function(df,FullMod,cbpsweights){
  result=NULL
  {
    RuralPoverty04quality <- df
    
    #1.
    my.formula <- FullMod[[1]]
    NBlin <- try(glm.nb(my.formula, weights = fit2$weights,data=RuralPoverty04quality,link=log,control=glm.control(maxit=200)))
    
    #2.
    my.formula <- FullMod[[2]]
    nullNBlin <- try(glm.nb(my.formula, weights = fit2$weights,data=RuralPoverty04quality,link=log,control=glm.control(maxit=200)))
    
    #3.
    my.formula <- FullMod[[3]]
    NBstate <- try(glm.nb(my.formula, weights = fit2$weights,data=RuralPoverty04quality,link=log,control=glm.control(maxit=200)))
    
    result=list(NBlin,nullNBlin,NBstate)
    
  }
}

#Function that runs three negative binomial models weighted by the non-parametric CBGPS weights (fit3), 
#i) a model with all control variables and the programme independent variable
#ii) a model with all control variables but no programme indenendnt variable, and
#iii) a model with  all control variables, the programme independent variable, and an interaction term between programme independent variable 
#and state interaction

NBmodFuncfit3 <- function(df,FullMod,cbpsweights){
  result=NULL
  {
    RuralPoverty04quality <- df
    
    #1.
    my.formula <- FullMod[[1]]
    NBlin <- try(glm.nb(my.formula, weights = fit3$weights,data=RuralPoverty04quality,link=log,control=glm.control(maxit=200)))
    
    #2.
    my.formula <- FullMod[[2]]
    nullNBlin <- try(glm.nb(my.formula, weights = fit3$weights,data=RuralPoverty04quality,link=log,control=glm.control(maxit=200)))
    
    #3.
    my.formula <- FullMod[[3]]
    NBstate <- try(glm.nb(my.formula, weights = fit3$weights,data=RuralPoverty04quality,link=log,control=glm.control(maxit=200)))
    
    result=list(NBlin,nullNBlin,NBstate)
    
  }
}



#Function that runs three Poisson and three Quasi-Poisson models weighted by the parametric CBGPS weights (fit2)
#Note, Quasi-Poisson models are the ones reported and assessed in the paper
#The Poisson models are run and used to create quasi-AIC

#Poisson models

#i) a model with all control variables and the programme independent variable
#ii) a model with all control variables but no programme indenendnt variable, and
#iii) a model with  all control variables, the programme independent variable, and an interaction term between programme independent variable 
#and state interaction

#Quasi-poisson models

#iv) a model with all control variables and the programme independent variable
#x) a model with all control variables but no programme indenendnt variable, and
#xi) a model with  all control variables, the programme independent variable, and an interaction term between programme independent variable 
#and state interaction

QPmodFuncfit2 <- function(df,FullMod){
  result=NULL
  {
    RuralPoverty04quality <- df
    
    #1. First need to run the Poisson models
    
    #1.
    my.formula <- FullMod[[1]]
    Poisson <- try(glm(my.formula, weights = fit2$weights,data=RuralPoverty04quality,family=poisson,control=glm.control(maxit=10000)))
    
    #2.
    my.formula <- FullMod[[2]]
    nullPoisson <- try(glm(my.formula, weights = fit2$weights,data=RuralPoverty04quality,family=poisson,control=glm.control(maxit=10000)))
    
    #3.
    my.formula <- FullMod[[3]]
    Poissonstate <- try(glm(my.formula, weights = fit2$weights,data=RuralPoverty04quality,family=poisson,control=glm.control(maxit=10000)))
    
    ####################################################################################
    
    #2. Then run the Quasi Poisson models
    
    #4.
    my.formula <- FullMod[[1]]
    QuasiPoisson <- try(glm(my.formula, weights = fit2$weights,data=RuralPoverty04quality,family=quasipoisson,control=glm.control(maxit=10000)))
    
    #5.
    my.formula <- FullMod[[2]]
    nullQuasiPoisson <- try(glm(my.formula, weights = fit2$weights,data=RuralPoverty04quality,family=quasipoisson,control=glm.control(maxit=10000)))
    
    #6.
    my.formula <- FullMod[[3]]
    QuasiPoissonstate <- try(glm(my.formula, weights = fit2$weights,data=RuralPoverty04quality,family=quasipoisson,control=glm.control(maxit=10000)))
    
    result=list(Poisson,nullPoisson,Poissonstate,QuasiPoisson,nullQuasiPoisson,QuasiPoissonstate)
    
  }
}


#Function that runs three Poisson and three Quasi-Poisson models weighted by the non-parametric CBGPS weights (fit3)
#Note, Quasi-Poisson models are the ones reported and assessed in the paper
#The Poisson models are run and used to create quasi-AIC

#Poisson models

#i) a model with all control variables and the programme independent variable
#ii) a model with all control variables but no programme indenendnt variable, and
#iii) a model with  all control variables, the programme independent variable, and an interaction term between programme independent variable 
#and state interaction

#Quasi-poisson models

#iv) a model with all control variables and the programme independent variable
#x) a model with all control variables but no programme indenendnt variable, and
#xi) a model with  all control variables, the programme independent variable, and an interaction term between programme independent variable 
#and state interaction
QPmodFuncfit3 <- function(df,FullMod){
  result=NULL
  {
    RuralPoverty04quality <- df
    
    #1. First need to run the Poisson models
    
    #1.
    my.formula <- FullMod[[1]]
    Poisson <- try(glm(my.formula, weights = fit3$weights,data=RuralPoverty04quality,family=poisson,control=glm.control(maxit=10000)))
    
    #2.
    my.formula <- FullMod[[2]]
    nullPoisson <- try(glm(my.formula, weights = fit3$weights,data=RuralPoverty04quality,family=poisson,control=glm.control(maxit=10000)))
    
    #3.
    my.formula <- FullMod[[3]]
    Poissonstate <- try(glm(my.formula, weights = fit3$weights,data=RuralPoverty04quality,family=poisson,control=glm.control(maxit=10000)))
    
    ####################################################################################
    
    #2. Then run the Quasi Poisson models
    
    #4.
    my.formula <- FullMod[[1]]
    QuasiPoisson <- try(glm(my.formula, weights = fit3$weights,data=RuralPoverty04quality,family=quasipoisson,control=glm.control(maxit=10000)))
    
    #5.
    my.formula <- FullMod[[2]]
    nullQuasiPoisson <- try(glm(my.formula, weights = fit3$weights,data=RuralPoverty04quality,family=quasipoisson,control=glm.control(maxit=10000)))
    
    #6.
    my.formula <- FullMod[[3]]
    QuasiPoissonstate <- try(glm(my.formula, weights = fit3$weights,data=RuralPoverty04quality,family=quasipoisson,control=glm.control(maxit=10000)))
    
    result=list(Poisson,nullPoisson,Poissonstate,QuasiPoisson,nullQuasiPoisson,QuasiPoissonstate)
    
  }
}


#Function that makes the correct formulas for the final core models, i.e. with all relevant independent and control variables
FullModListRedElectionScale <- function(timeperiod,progr,Depvar,IndepVarLin, baselineDepVar,stateName, xvars,intLin){
  result=NULL
  {
    Election <- paste("PresElec_vs_",timeperiod,sep="",paste("_w_",progr,sep=""))
    Election <- paste("scale(",Election,sep="",paste(")",sep=""))
    LinMod <- as.formula(reformulate(termlabels=c(IndepVarLin,baselineDepVar,stateName, xvars,Election),response=Depvar))
    NullMod <- as.formula(reformulate(termlabels=c(baselineDepVar,stateName, xvars,Election),response=Depvar))
    StateMod <- as.formula(reformulate(termlabels=c(intLin, baselineDepVar, xvars,Election),response=Depvar))
    result=list(LinMod,NullMod,StateMod)
  }
}


#Function to get the correct sample, set the correct contrasts, select the correct CBGPS weights and the correct model type
SampleFunc <- function(namefolders,script,start1,end1,yearsample,timeperiod,progr,biomvar,savefolder,fit2name,fit3name,modlistfunc,modfunc,fitnamesave){
  result=NULL
  {
    #1.Load in script to get correct sample 
    script <- paste(namefolders,script,sep="")
    
    sourcePartial(script,startTag="#s1",endTag="#s2")
    sourcePartial(script,startTag="#start1_",endTag="#fin1_")
    sourcePartial(script,"#start2_","#fin2_")
    sourcePartial(script,start1,end1)
    
    print(c(cbpsDepVar,cbpsbaselineDepVar))
    
    #2. Get on the election variable
    ElecDF <- read.csv(paste("Data/Elections/AllElectionPerMun_",yearsample,sep="",paste("sample_Fin.csv",sep="")),
                       header=TRUE, sep=",")
    
    ElecDFsub <- subset(ElecDF, select=c("codeibge",paste("PresElec_vs_",timeperiod,sep="",paste("_w_",progr,sep=""))))
    names(ElecDFsub)[1] <- "IBGECode7digit"
    
    #it has to be the same sample as rural sample
    RuralPoverty04quality <- merge(x=RuralPoverty04quality,y=ElecDFsub,by="IBGECode7digit",all.x=T)
    
    #Print some key sample information
    print(length(RuralPoverty04quality[,1]))
    elecvar <- length(names(RuralPoverty04quality))
    print(length(which(is.na(RuralPoverty04quality[,elecvar]))))
    RuralPoverty04quality <- RuralPoverty04quality[!is.na(RuralPoverty04quality[,elecvar]),]
    print(length(RuralPoverty04quality[,1]))
    
    #2. Get the reference levels (want this to be the ones with lowest)
    #2.1. Cannot have empty levels
    RuralPoverty04quality$stateName <- droplevels(RuralPoverty04quality$stateName)
    RuralPoverty04quality[,biomvar] <- droplevels(RuralPoverty04quality[,biomvar])
    
    #2.2. Find the category with least observations
    
    #State
    onetabS <- data.frame(table(RuralPoverty04quality[,"stateName"]))
    onetabS <- onetabS [order(onetabS$Freq),]
    onetabS$Var1 <- as.character(onetabS$Var1)
    
    smallestS <- onetabS[1,1]
    
    #Biome
    onetabB <- data.frame(table(RuralPoverty04quality[,biomvar]))
    onetabB <- onetabB [order(onetabB$Freq),]
    onetabB$Var1 <- as.character(onetabB$Var1)
    
    smallestB <- onetabB[1,1]
    
    #2.3. Set the smallest state/biome as the reference (the last one)
    
    #state Reference
    stateSeq <- levels(RuralPoverty04quality$stateName)
    stateSeqNoRef <- stateSeq [! stateSeq %in% smallestS]
    stateSeqFin <- c(stateSeqNoRef,smallestS)#this ensures that the smallest one is last!
    
    #biome Reference
    biomeSeq <- levels(RuralPoverty04quality[,biomvar])
    biomeSeqNoRef <- biomeSeq [! biomeSeq %in% smallestB]
    biomeSeqFin <- c(biomeSeqNoRef,smallestB)#this ensures that the smallest one is last!
    
    #set the level
    RuralPoverty04quality$stateName <- factor(RuralPoverty04quality$stateName, levels = stateSeqFin)
    RuralPoverty04quality[,biomvar] <- factor(RuralPoverty04quality[,biomvar], levels = biomeSeqFin)
    
    #2.4. Set the contrast
    contrasts(RuralPoverty04quality$stateName) <- "contr.Sum"
    contrasts(RuralPoverty04quality[,biomvar]) <- "contr.Sum"
    
    #3. Load in the CBGPS weights
    load(paste(savefolder,fit2name,sep=""),envir=globalenv())#I save it to global env then can call it from there
    load(paste(savefolder,fit3name,sep=""),envir=globalenv())
    
    #4. Decide which model list to have - either with or without election variable!
    FullMod <- modlistfunc(timeperiod,progr,Depvar,IndepVarLin, baselineDepVar,stateName, xvars,intLin)
    
    #5. Get the final models, specify which type of model to run
    AllMods <- modfunc(RuralPoverty04quality,FullMod)
    
    #6. Save it in the same folder as found the weights in
    save(AllMods, file = paste(savefolder,fitnamesave,sep="",paste(".rda",sep="")))
    
    result <- AllMods
    
  }
}


#Function to identify and influential points based on dfbetas
dfBetaFuncNEW <- function(df,mod,modnr,mainVar,originv){
  result=Null
  {
    
    #Get dfbetas when threshold is 2/sqrt(N)
    RuralPoverty04quality <- cbind(df,  DfBeta=dfbeta(mod[[modnr]])[,mainVar],
                                   DfBetaS=dfbetas(mod[[modnr]])[,mainVar])
    
    (dfbetas.cutoff <- 2/sqrt(length(RuralPoverty04quality$DfBetaS)))
    OptionSqrt <-subset(RuralPoverty04quality, abs(DfBetaS)>dfbetas.cutoff)
    OptionSqrt <- subset(OptionSqrt, select=c("IBGECode7digit","DfBetaS","DfBeta",originv,"stateName"))
    
    #Order by dfbetas again
    OptionSqrt <- OptionSqrt [order(OptionSqrt$DfBetaS),]
    
    #Get just a vector of the IBGEcode so I can subset from that!
    OptionSqrtVect <- as.vector(OptionSqrt$IBGECode7digit)
    
    #Would be nice to know how many in total, how many positive and how many negative
    #Get a summary table
    states <- data.frame(table(OptionSqrt$stateName))
    states <- states [order(states$Freq),]
    states$Var1 <- as.character(states$Var1)
    Total <- c("Total",length(OptionSqrt$IBGECode7digit))
    TotalPos <- c("TotalPos",length(which(OptionSqrt$DfBetaS>0)))
    TotalNeg <- c("TotalNeg",length(which(OptionSqrt$DfBetaS<0)))
    Together <- rbind(states,Total,TotalPos,TotalNeg)
    row.names(Together) <- NULL
    
    #IF I used a higher threshold of 1
    ix.dfbs.gt <- which(abs(OptionSqrt$DfBetaS) > 1)#this gets both positive and negative above threshold!
    new <- data.frame(OptionSqrt[ix.dfbs.gt,])
    newVect <- as.vector(new$IBGECode7digit)
    
    result = list(dfbetas.cutoff,Together,OptionSqrt,OptionSqrtVect,new,newVect)
    
  }
}

#Exclude influential points
sqrtFunc <- function(df,savefolder,modlistfile,nrmod){
  result=NULL
  {
    #Read in df and mod list
    RuralPoverty04quality <- df
    
    fullmod <- loadRData(paste(savefolder,modlistfile,sep=""))
    
    #Find the influential points for programme variable
    IndepVarLinNorm <- gsub("log10scaledMain","",IndepVarLin)
    NewDFbetasLinNEW <- dfBetaFuncNEW(RuralPoverty04quality,fullmod,nrmod, IndepVarLin,IndepVarLinNorm)
    
    #Remove influential points
    NoInfluenceSqrt <- NewDFbetasLinNEW[[4]]
    
    RuralPoverty04quality <- RuralPoverty04quality[!RuralPoverty04quality$IBGECode7digit %in% NoInfluenceSqrt,]
    
    result <- RuralPoverty04quality
  }
}

#For models which do not require excluding influential points put this as an "empty" function, i.e. does not exclude anything
#but uses the same argument as those models that do and use sqrtFunc
sqrtFuncEmpty <- function(df,savefolder,modlistfile,nrmod){
  result=NULL
  {
    
    #Read in df
    RuralPoverty04quality <- df
    
    #Dont do anything, just print the df back out
    result <- RuralPoverty04quality
  }
}


#New model function to get the correct sample, set the correct contrasts, select the correct CBGPS weights and the correct model type
#Included here is the exclusion of influential points

SampleFuncNew <- function(namefolders,script,start1,end1,yearsample,timeperiod,progr,
                          inffunc,savefolder,modlist,nrmod,
                          biomvar,fit2name,fit3name,modlistfunc,modfunc,fitnamesave){
  result=NULL
  {
    #Get sample
    script <- paste(namefolders,script,sep="")
    
    sourcePartial(script,startTag="#s1",endTag="#s2")
    sourcePartial(script,startTag="#start1_",endTag="#fin1_")
    sourcePartial(script,"#start2_","#fin2_")
    sourcePartial(script,start1,end1)
    
    print(c(cbpsDepVar,cbpsbaselineDepVar))
    
    #2. Get on the election variable
    ElecDF <- read.csv(paste("Data/Elections/AllElectionPerMun_",yearsample,sep="",paste("sample_Fin.csv",sep="")),
                       header=TRUE, sep=",")
    
    ElecDFsub <- subset(ElecDF, select=c("codeibge",paste("PresElec_vs_",timeperiod,sep="",paste("_w_",progr,sep=""))))
    names(ElecDFsub)[1] <- "IBGECode7digit"
    
    #it has to be the same sample as rural
    RuralPoverty04quality <- merge(x=RuralPoverty04quality,y=ElecDFsub,by="IBGECode7digit",all.x=T)
    
    #Get relevant sample information
    print(length(RuralPoverty04quality[,1]))
    elecvar <- length(names(RuralPoverty04quality))
    print(length(which(is.na(RuralPoverty04quality[,elecvar]))))
    RuralPoverty04quality <- RuralPoverty04quality[!is.na(RuralPoverty04quality[,elecvar]),]
    print(length(RuralPoverty04quality[,1]))
    
    #Take out influential points when QP/NB models, leave as is when robust OLS
    RuralPoverty04quality <- inffunc(RuralPoverty04quality,savefolder,modlist,nrmod)
    
    #2. Get the reference levels (want this to be the ones with lowest)
    #2.1. Cannot have empty levels
    RuralPoverty04quality$stateName <- droplevels(RuralPoverty04quality$stateName)
    RuralPoverty04quality[,biomvar] <- droplevels(RuralPoverty04quality[,biomvar])
    
    #2.2. Find the category with least observations
    
    #State
    onetabS <- data.frame(table(RuralPoverty04quality[,"stateName"]))
    onetabS <- onetabS [order(onetabS$Freq),]
    onetabS$Var1 <- as.character(onetabS$Var1)
    
    smallestS <- onetabS[1,1]
    
    #Biome
    onetabB <- data.frame(table(RuralPoverty04quality[,biomvar]))
    onetabB <- onetabB [order(onetabB$Freq),]
    onetabB$Var1 <- as.character(onetabB$Var1)
    
    smallestB <- onetabB[1,1]
    
    #2.3. Set the smallest state/biome as the reference (the last one)
    
    #state Reference
    stateSeq <- levels(RuralPoverty04quality$stateName)
    stateSeqNoRef <- stateSeq [! stateSeq %in% smallestS]
    stateSeqFin <- c(stateSeqNoRef,smallestS)#this ensures that the smallest one is last!
    
    #biome Reference
    biomeSeq <- levels(RuralPoverty04quality[,biomvar])
    biomeSeqNoRef <- biomeSeq [! biomeSeq %in% smallestB]
    biomeSeqFin <- c(biomeSeqNoRef,smallestB)#this ensures that the smallest one is last!
    
    #set the level
    RuralPoverty04quality$stateName <- factor(RuralPoverty04quality$stateName, levels = stateSeqFin)
    RuralPoverty04quality[,biomvar] <- factor(RuralPoverty04quality[,biomvar], levels = biomeSeqFin)
    
    #2.4. Set the contrast
    contrasts(RuralPoverty04quality$stateName) <- "contr.Sum"
    contrasts(RuralPoverty04quality[,biomvar]) <- "contr.Sum"
    
    #3. Load in the new weights
    load(paste(savefolder,fit2name,sep=""),envir=globalenv())#save it to global env
    load(paste(savefolder,fit3name,sep=""),envir=globalenv())
    
    #4. Decide which model list to have - either with or without election variable!
    FullMod <- modlistfunc(timeperiod,progr,Depvar,IndepVarLin, baselineDepVar,stateName, xvars,intLin)
    
    #5. Get the final models
    AllMods <- modfunc(RuralPoverty04quality,FullMod)
    
    #6. Save it
    save(AllMods, file = paste(savefolder,fitnamesave,sep="",paste(".rda",sep="")))
    
    result <- AllMods
  }
}


SampleFuncNewReduced <- function(namefolders,script,start1,end1,yearsample,timeperiod,progr,
                                 inffunc,savefolder,modlist,nrmod,
                                 biomvar,fit2name,fit3name,modlistfunc){
  result=NULL
  {
    #Get sample
    script <- paste(namefolders,script,sep="")
    
    sourcePartial(script,startTag="#s1",endTag="#s2")
    sourcePartial(script,startTag="#start1_",endTag="#fin1_")
    sourcePartial(script,"#start2_","#fin2_")
    sourcePartial(script,start1,end1)
    
    print(c(cbpsDepVar,cbpsbaselineDepVar))
    
    #2. Get on the election variable
    ElecDF <- read.csv(paste("Data/Elections/AllElectionPerMun_",yearsample,sep="",paste("sample_Fin.csv",sep="")),
                       header=TRUE, sep=",")
    
    ElecDFsub <- subset(ElecDF, select=c("codeibge",paste("PresElec_vs_",timeperiod,sep="",paste("_w_",progr,sep=""))))
    names(ElecDFsub)[1] <- "IBGECode7digit"
    
    #it has to be the same sample as rural
    RuralPoverty04quality <- merge(x=RuralPoverty04quality,y=ElecDFsub,by="IBGECode7digit",all.x=T)
    
    #Get relevant sample information
    print(length(RuralPoverty04quality[,1]))
    elecvar <- length(names(RuralPoverty04quality))
    print(length(which(is.na(RuralPoverty04quality[,elecvar]))))
    RuralPoverty04quality <- RuralPoverty04quality[!is.na(RuralPoverty04quality[,elecvar]),]
    print(length(RuralPoverty04quality[,1]))
    
    #Take out influential points when QP/NB models, leave as is when robust OLS
    RuralPoverty04quality <- inffunc(RuralPoverty04quality,savefolder,modlist,nrmod)
    
    #2. Get the reference levels (want this to be the ones with lowest)
    #2.1. Cannot have empty levels
    RuralPoverty04quality$stateName <- droplevels(RuralPoverty04quality$stateName)
    RuralPoverty04quality[,biomvar] <- droplevels(RuralPoverty04quality[,biomvar])
    
    #2.2. Find the category with least observations
    
    #State
    onetabS <- data.frame(table(RuralPoverty04quality[,"stateName"]))
    onetabS <- onetabS [order(onetabS$Freq),]
    onetabS$Var1 <- as.character(onetabS$Var1)
    
    smallestS <- onetabS[1,1]
    
    #Biome
    onetabB <- data.frame(table(RuralPoverty04quality[,biomvar]))
    onetabB <- onetabB [order(onetabB$Freq),]
    onetabB$Var1 <- as.character(onetabB$Var1)
    
    smallestB <- onetabB[1,1]
    
    #2.3. Set the smallest state/biome as the reference (the last one)
    
    #state Reference
    stateSeq <- levels(RuralPoverty04quality$stateName)
    stateSeqNoRef <- stateSeq [! stateSeq %in% smallestS]
    stateSeqFin <- c(stateSeqNoRef,smallestS)#this ensures that the smallest one is last!
    
    #biome Reference
    biomeSeq <- levels(RuralPoverty04quality[,biomvar])
    biomeSeqNoRef <- biomeSeq [! biomeSeq %in% smallestB]
    biomeSeqFin <- c(biomeSeqNoRef,smallestB)#this ensures that the smallest one is last!
    
    #set the level
    RuralPoverty04quality$stateName <- factor(RuralPoverty04quality$stateName, levels = stateSeqFin)
    RuralPoverty04quality[,biomvar] <- factor(RuralPoverty04quality[,biomvar], levels = biomeSeqFin)
    
    #2.4. Set the contrast
    contrasts(RuralPoverty04quality$stateName) <- "contr.Sum"
    contrasts(RuralPoverty04quality[,biomvar]) <- "contr.Sum"
    
    #3. Load in the new weights
    load(paste(savefolder,fit2name,sep=""),envir=globalenv())#save it to global env
    load(paste(savefolder,fit3name,sep=""),envir=globalenv())
    
    #4. Decide which model list to have - either with or without election variable!
    FullMod <- modlistfunc(timeperiod,progr,Depvar,IndepVarLin, baselineDepVar,stateName, xvars,intLin)
    
    result <- list(RuralPoverty04quality,FullMod)
  }
}


######################################################################################

#Function to make quasi-AIC (for Quasi-Poisson models)
dfun <- function(object){
  with(object,sum((weights * residuals^2)[weights > 0])/df.residual)
}

qAICfunc <- function(x,y,z){
  tryCatch(
    # This is what I want to do.
    # If executing more than one expression you need curly braces.
    {
      
      r2adj = summary(x)$adj.r#I first try the adj.r on each model. If its a NB/QP model youll get NULL
      r2adj <- round(r2adj,digits=6)#which means here youll get the error. If error then it moves on to AIC
      
      return(r2adj)
    },
    # ... if an error occurs, then I try to make an AIC
    error=function(error_message) {
      message("r2adj did not work")
      message("And below is the error message from R:")
      message(error_message)
      
      AICComp <- AIC(x)
      AICComp <- ifelse(is.na(AICComp),"NA",AICComp)#this is to generate an error for QP when it gets NA!
      AICComp <- round(AICComp,digits=6)#here if it is not numeric I will get an error!
      
      return(AICComp)
    }, 
    # ... if an error occurs, then it will do the final option, i.e. qAIC!
    error=function(error_message) {
      message("AIC did not work")
      message("And below is the error message from R:")
      message(error_message)
      
      #First get the poisson model as basis for aic
      Poismod <- y[[z]]
      
      #Change in aic
      AICBqp = (qAIC(Poismod,dispersion=dfun(Poismod)))
      AICBqp <- round(AICBqp,digits=6)
      
      return(AICBqp)
    }
    
  )
}

#Function to get adjusted r2 (for robust OLS models), or AIC (for Negative Binomial models)
r2AICfunc <- function(x,y,z){
  tryCatch(
    # This is what I want to do.
    # If executing more than one expression you need curly braces.
    {
      
      r2adj = summary(x)$adj.r#I first try the adj.r on each model. If its a NB/QP model I get NULL
      r2adj <- round(r2adj,digits=6)#which means here I get the error. If error then it moves on to AIC
      
      return(r2adj)
    },
    # ... but if an error occurs, then I try to make an AIC
    error=function(error_message) {
      message("r2adj did not work")
      message("And below is the error message from R:")
      message(error_message)
      
      AICComp <- AIC(x)
      AICComp <- ifelse(is.na(AICComp),"NA",AICComp)#this is to generate an error for QP when it gets NA!
      AICComp <- round(AICComp,digits=6)#here if it is not numeric I will get an error!
      
      return(AICComp)
    }
  )
}


#Function to compare best fit of models
CompModFuncFin <- function(namefolders,script,start1,end1,savefolder,linfile,nrFileLin,nrFileStateLin,aicfunc,intname){
  result=NULL
  {
    
    script <- paste(namefolders,script,sep="")
    sourcePartial(script,startTag="#s1",endTag="#s2")
    sourcePartial(script,startTag="#start1_",endTag="#fin1_")
    sourcePartial(script,"#start2_","#fin2_")
    sourcePartial(script,start1,end1)
    
    fullmod <- loadRData(paste(savefolder,linfile,sep=""))#the model list
    
    onemodB <- fullmod[[nrFileLin]]#full mod
    onemodC <- fullmod[[nrFileStateLin]]#full model with interaction
    
    #Get nr of observations from model
    nrObsNorm <- length(onemodB$fitted.values) 
    
    #All summary statistics
    
    #1. full mod coeficient, SE and pvalue
    CoefLin <- round(summary(onemodB)$coefficients[IndepVarLin,1],digits=5)
    CoefSELin <- round(summary(onemodB)$coefficients[IndepVarLin,2],digits=5)
    PvalLin <- summary(onemodB)$coefficients[IndepVarLin,4]
    
    #2. full mod with interaction coeficient, SE and pvalue
    CoefStateLin <- round(summary(onemodC)$coefficients[IndepVarLin,1],digits=5)
    CoefSEStateLin <- round(summary(onemodC)$coefficients[IndepVarLin,2],digits=5)
    PvalStateLin <- summary(onemodC)$coefficients[IndepVarLin,4]
    
    print(IndepVarLin)
    
    #3. Get the nr of significant states/biomes
    allcoefs <- data.frame(summary(onemodC)$coefficients)
    allcoefs$Vars <- row.names(allcoefs)
    row.names(allcoefs) <- NULL
    allcoefs <- subset(allcoefs, select=c(5,1:4))
    allcoefs$Vars <- as.character(allcoefs$Vars)
    
    allcoefsState <- allcoefs[grepl(":", allcoefs$Vars),]
    signstate <- length(which(allcoefsState[,5]< 0.05))
    
    #Get the adjusted r2/AIC - chose function depending on type of model
    #Robust OLS use adjusted r2, Negative binomial use AIC, Quasi-Poisson use quasi-AIC
    #For quasi-AIC have to specify the poisson model, so introduce a second argument
    
    #use the nrpick argument to select either the full (1) or interaction (3) poisson model
    #when using QP models. then compare those two together
    modr2Linear <- aicfunc(onemodB,fullmod,1)
    modr2StateLinear <- aicfunc(onemodC,fullmod,3)
    
    #Change in r/aic
    adjR2change <- modr2StateLinear-modr2Linear
    adjR2change <- round(adjR2change,digits=6)
    
    #Assess significance of interaction - run an Anova
    #function has incorporated possibility of error in ANOVA
    my_anova_fun <- function(x){
      tryCatch(
        {
          
          #Anova for the interaction
          anovaSig <- data.frame(Anova(x, type="3"))
          anovaSig$Vars <- row.names(anovaSig)
          row.names(anovaSig) <- NULL
          anovaSig <- subset(anovaSig, select=c(4,1:3))
          anovaSig$Vars <- as.character(anovaSig$Vars)
          anovaSigState <- anovaSig[grepl(intname, anovaSig$Vars),]
          names(anovaSigState)[4] <- "Pval"
          anovaSigState$Pval <- round(anovaSigState$Pval,digits=8)
          
          colnames(anovaSigState)[colnames(anovaSigState)=="LR.Chisq"] <- "Chisq"
          anovaSigState <- subset(anovaSigState, select=c("Vars","Df","Chisq","Pval"))
          
          
          return(anovaSigState)
        },
        # ... but if an error occurs, write error message and write a df with NAs: 
        error=function(error_message) {
          message("Anova failed")
          message(error_message)
          
          anovaSigState <- data.frame(t(c(NA,NA,NA,NA)))
          names(anovaSigState) <- c("Vars","Df","Chisq","Pval")
          
          return(anovaSigState)
        }
      )
    }
    
    anovaSigState <- my_anova_fun(onemodC)
    
    #Final table
    Tog <- cbind(linfile,nrObsNorm,Depvar,IndepVarLin,CoefLin,CoefSELin,PvalLin,CoefStateLin,CoefSEStateLin,PvalStateLin,signstate,
                 modr2Linear,modr2StateLinear,adjR2change,anovaSigState)
    
    
    result <- Tog
    
  }
}

##################################################################################################################################

tempfunc <- function(namefolders,script,start1,end1,yearsample,timeperiod,progr,biomvar,namefolderssave,fitnamesave){
  result=NULL
  {
    #Get the sample that you need - based on individual scripts
    #get directory first - same I will use to save the new file
    script <- paste(namefolders,script,sep="")
    
    sourcePartial(script,startTag="#s1",endTag="#s2")
    sourcePartial(script,startTag="#start1_",endTag="#fin1_")
    sourcePartial(script,"#start2_","#fin2_")
    sourcePartial(script,start1,end1)
    
    print(c(cbpsDepVar,cbpsbaselineDepVar))
    getnames <- paste(cbpsDepVar,cbpsbaselineDepVar,sep="_")
    
    #1. Get on the election variable
    ElecDF <- read.csv(paste("Data/Elections/AllElectionPerMun_",yearsample,sep="",paste("sample_Fin.csv",sep="")),
                       header=TRUE, sep=",")
    
    ElecDFsub <- subset(ElecDF, select=c("codeibge",paste("PresElec_vs_",timeperiod,sep="",paste("_w_",progr,sep=""))))
    names(ElecDFsub)[1] <- "IBGECode7digit"
    
    #Include here that it has to be the same sample as rural!
    RuralPoverty04quality <- merge(x=RuralPoverty04quality,y=ElecDFsub,by="IBGECode7digit",all.x=T)
    
    #Print some useful information, and gather key sample information to be saved in a df at the end
    print(length(RuralPoverty04quality[,1]))
    getN <- length(RuralPoverty04quality[,1])
    elecvar <- length(names(RuralPoverty04quality))
    print(length(which(is.na(RuralPoverty04quality[,elecvar]))))
    getNAs <- length(which(is.na(RuralPoverty04quality[,elecvar])))
    RuralPoverty04quality <- RuralPoverty04quality[!is.na(RuralPoverty04quality[,elecvar]),]
    print(length(RuralPoverty04quality[,1]))
    getnewN <- length(RuralPoverty04quality[,1])
    
    #2. Get the reference levels (want this to be the ones with the least observations)
    #2.1. Cannot have empty levels
    RuralPoverty04quality$stateName <- droplevels(RuralPoverty04quality$stateName)
    RuralPoverty04quality[,biomvar] <- droplevels(RuralPoverty04quality[,biomvar])
    
    #2.2. Find the category with least observations
    
    #State
    onetabS <- data.frame(table(RuralPoverty04quality[,"stateName"]))
    onetabS <- onetabS [order(onetabS$Freq),]
    onetabS$Var1 <- as.character(onetabS$Var1)
    
    smallestS <- onetabS[1,1]
    smallestSnr <- onetabS[1,2]
    ndsmallestS <- onetabS[2,1]
    ndsmallestSnr <- onetabS[2,2]
    thrdsmallestS <- onetabS[3,1]
    thrdsmallestSnr <- onetabS[3,2]
    
    #Biome
    onetabB <- data.frame(table(RuralPoverty04quality[,biomvar]))
    onetabB <- onetabB [order(onetabB$Freq),]
    onetabB$Var1 <- as.character(onetabB$Var1)
    
    smallestB <- onetabB[1,1]
    smallestBnr <- onetabB[1,2]
    ndsmallestB <- onetabB[2,1]
    ndsmallestBnr <- onetabB[2,2]
    thrdsmallestB <- onetabB[3,1]
    thrdsmallestBnr <- onetabB[3,2]
    
    #2.3. Set the smallest state/biome as the reference (the last one)
    
    #state Reference
    stateSeq <- levels(RuralPoverty04quality$stateName)
    stateSeqNoRef <- stateSeq [! stateSeq %in% smallestS]
    stateSeqFin <- c(stateSeqNoRef,smallestS)#this ensures that the smallest one is last!
    
    #biome Reference
    biomeSeq <- levels(RuralPoverty04quality[,biomvar])
    biomeSeqNoRef <- biomeSeq [! biomeSeq %in% smallestB]
    biomeSeqFin <- c(biomeSeqNoRef,smallestB)#this ensures that the smallest one is last!
    
    #set the level
    RuralPoverty04quality$stateName <- factor(RuralPoverty04quality$stateName, levels = stateSeqFin)
    RuralPoverty04quality[,biomvar] <- factor(RuralPoverty04quality[,biomvar], levels = biomeSeqFin)
    
    #2.4. Set the contrast
    contrasts(RuralPoverty04quality$stateName) <- "contr.Sum"
    contrasts(RuralPoverty04quality[,biomvar]) <- "contr.Sum"
    
    #Then can compare the reference from contrast with the table of smallest to make sure the right one is selected
    
    checkcontrState <- data.frame(contrasts(RuralPoverty04quality[,"stateName"]))
    checkcontrState$Var <- row.names(checkcontrState)
    checkcontrStateRef <- checkcontrState[which(checkcontrState[,1] < 0),]
    checkcontrStateRef <- checkcontrStateRef$Var
    checkcontrStateRefOrig <- checkcontrStateRef
    
    checkcontrBiome <- data.frame(contrasts(RuralPoverty04quality[,biomvar]))
    checkcontrBiome$Var <- row.names(checkcontrBiome)
    checkcontrBiomeRef <- checkcontrBiome[which(checkcontrBiome[,1] < 0),]
    checkcontrBiomeRef <- checkcontrBiomeRef$Var
    checkcontrBiomeRefOrig <- checkcontrBiomeRef
    
    #3.Make cbgps formula, adding on the election variable
    my.formula <- as.formula(reformulate(termlabels=c(cbpsbaselineDepVar,cbpsVars),response=cbpsDepVar))
    addTerm <- paste("PresElec_vs_",timeperiod,sep="",paste("_w_",progr,sep=""))
    newmodformula <- update(my.formula,paste("~. +", addTerm))
    
    #4. Get out a log for the weight (sample size and reference)
    tog <- cbind(getnames,getN,getNAs,getnewN,checkcontrStateRefOrig,smallestS,smallestSnr,ndsmallestS,ndsmallestSnr,thrdsmallestS,thrdsmallestSnr,
                 checkcontrBiomeRefOrig,smallestB,smallestBnr,ndsmallestB,ndsmallestBnr,thrdsmallestB,thrdsmallestBnr)
    
    result <- tog
  }
}

#####################################################################################################################


#Getting all moran with election variable

#Function to create neighbourhood matrices and run Morans I test
NewMoranFunc2019 <- function(shape,ResDf,ResidualVar){
  result=NULL
  {
    
    #merge with residuals
    vdc_2001<-merge(shape, ResDf, by.x="GEOCODIGO", by.y="IBGECode7digit",
                    all.x=FALSE, all.y=TRUE)
    vdc_2001 <- vdc_2001[!duplicated(vdc_2001$GEOCODIGO),]#take away duplicates in shapefile
    
    #preprocess - create coordinate object and ID object
    coords<-coordinates(vdc_2001)
    summary(coords)
    IDs<-row.names(as(vdc_2001, "data.frame"))
    
    #OPTION 1 - contiguity queen
    vdc_2001_nbq<-poly2nb(vdc_2001,queen=TRUE)
    
    #spatial weights
    vdc_2001_nbq_w<- nb2listw(vdc_2001_nbq,style="W",zero.policy=T)#Weight 1
    
    #OPTION 2 - distance
    
    #create nearest neighbor
    poverty_kn1 <- knn2nb(knearneigh(coords, k = 1), row.names = IDs)
    
    #calculate distance of nearest neighbor
    dist <- unlist(nbdists(poverty_kn1, coords))  # Define distance
    
    #select max distance and define neighbors up to certain distance
    max_k1<-max(dist)
    poverty_kd1 <- dnearneigh(coords, d1 = 0, d2 = 0.75 * max_k1, row.names = IDs)
    
    #spatial weights based on inverse distance
    SIDdlist = nbdists(poverty_kd1,coords)
    SIDdlist = lapply(SIDdlist, function(x) 1/x)
    
    vdc_2001_prop10_inv = nb2listw(poverty_kd1, glist=SIDdlist,zero.policy=TRUE)#Weight 2
    
    #Do the MOran test
    Border <- moran.test(ResDf[,ResidualVar], listw=vdc_2001_nbq_w,alternative="two.sided",zero.policy=T)
    Distance <- moran.test(ResDf[,ResidualVar], listw=vdc_2001_prop10_inv,alternative="two.sided",zero.policy=T)
    
    #Put in a table
    BorderTab <- data.frame(t(Border$estimate))
    DistanceTab <- data.frame(t(Distance$estimate))
    Tog <- rbind(BorderTab,DistanceTab)
    Type <- c("Border","Distance")
    Pvalue <- c(Border$p.value,Distance$p.value)
    
    FinalTab <- cbind(Type,Tog,Pvalue)
    
    result <- FinalTab
  }
}


#Function for all Morans I/spatial autocorrelation steps
NewestMoranALL2019Elec <- function(folder,script,start1,end1,yearsample,timeperiod,progr,inffunc,namefolders,modlist,nrmod,biomvar,
                                   linfile,nrFile,shapeVar){
  result=NULL
  {
    
    #1.Load in the df needed based on sample script
    script <- paste(folder,script,sep="")
    
    sourcePartial(script,startTag="#s1",endTag="#s2")
    sourcePartial(script,startTag="#start1_",endTag="#fin1_")
    sourcePartial(script,"#start2_","#fin2_")
    sourcePartial(script,start1,end1)
    
    print(c(cbpsDepVar,cbpsbaselineDepVar))
    
    #2. Get on the election variable
    ElecDF <- read.csv(paste("Data/Elections/AllElectionPerMun_",yearsample,sep="",paste("sample_Fin.csv",sep="")),
                       header=TRUE, sep=",")
    
    ElecDFsub <- subset(ElecDF, select=c("codeibge",paste("PresElec_vs_",timeperiod,sep="",paste("_w_",progr,sep=""))))
    names(ElecDFsub)[1] <- "IBGECode7digit"
    
    #it has to be the same sample as rural!
    RuralPoverty04quality <- merge(x=RuralPoverty04quality,y=ElecDFsub,by="IBGECode7digit",all.x=T)
    
    #Get sample information
    print(length(RuralPoverty04quality[,1]))
    elecvar <- length(names(RuralPoverty04quality))
    print(length(which(is.na(RuralPoverty04quality[,elecvar]))))
    RuralPoverty04quality <- RuralPoverty04quality[!is.na(RuralPoverty04quality[,elecvar]),]
    print(length(RuralPoverty04quality[,1]))
    
    #get sample without influential points (for non-OLS models)
    RuralPoverty04quality <- inffunc(RuralPoverty04quality,namefolders,modlist,nrmod)
    
    #set the correct reference state/biome
    #2. Get the reference levels (want this to be the ones with lowest)
    #2.1. Cannot have empty levels
    RuralPoverty04quality$stateName <- droplevels(RuralPoverty04quality$stateName)
    RuralPoverty04quality[,biomvar] <- droplevels(RuralPoverty04quality[,biomvar])
    
    #2.2. Find the category with least observations
    
    #State
    onetabS <- data.frame(table(RuralPoverty04quality[,"stateName"]))
    onetabS <- onetabS [order(onetabS$Freq),]
    onetabS$Var1 <- as.character(onetabS$Var1)
    
    smallestS <- onetabS[1,1]
    
    #Biome
    onetabB <- data.frame(table(RuralPoverty04quality[,biomvar]))
    onetabB <- onetabB [order(onetabB$Freq),]
    onetabB$Var1 <- as.character(onetabB$Var1)
    
    smallestB <- onetabB[1,1]
    
    #2.3. Set the smallest state/biome as the reference (the last one)
    
    #state Reference
    stateSeq <- levels(RuralPoverty04quality$stateName)
    stateSeqNoRef <- stateSeq [! stateSeq %in% smallestS]
    stateSeqFin <- c(stateSeqNoRef,smallestS)#this ensures that the smallest one is last!
    
    #biome Reference
    biomeSeq <- levels(RuralPoverty04quality[,biomvar])
    biomeSeqNoRef <- biomeSeq [! biomeSeq %in% smallestB]
    biomeSeqFin <- c(biomeSeqNoRef,smallestB)#this ensures that the smallest one is last!
    
    #set the level
    RuralPoverty04quality$stateName <- factor(RuralPoverty04quality$stateName, levels = stateSeqFin)
    RuralPoverty04quality[,biomvar] <- factor(RuralPoverty04quality[,biomvar], levels = biomeSeqFin)
    
    #2.4. Set the contrast
    contrasts(RuralPoverty04quality$stateName) <- "contr.Sum"
    contrasts(RuralPoverty04quality[,biomvar]) <- "contr.Sum"
    
    ##################################################################################################################
    ##################################################################################################################
    
    #Make propensity score model with election variable
    
    Election <- paste("PresElec_vs_",timeperiod,sep="",paste("_w_",progr,sep=""))
    
    #1. Make Propensity score model and get residuals
    my.formula <- as.formula(reformulate(termlabels=c(cbpsbaselineDepVar,cbpsVars,Election),response=cbpsDepVar))
    PropMod <- lm(my.formula,data=RuralPoverty04quality)

    RuralPoverty04quality$PropModresiduals <- resid(PropMod,type="resp")
    
    #I verify that the residuals etc. are put on correctly!
    RuralPoverty04quality$ModCheckProp <- resid(PropMod,type="resp")+PropMod$fitted.values
    
    RuralPoverty04quality$ModCheckProp <- round(RuralPoverty04quality$ModCheckProp, digits=8)
    RuralPoverty04quality[,cbpsDepVar] <- round(RuralPoverty04quality[,cbpsDepVar],digits=8)
    RuralPoverty04quality$ModCheckPropDiff <- RuralPoverty04quality[,cbpsDepVar]-RuralPoverty04quality$ModCheckProp
    
    print(paste("The difference in Prop mod fitted.values+residuals AND the model outcome variable -",cbpsDepVar,paste("=",sum(RuralPoverty04quality$ModCheckPropDiff))))
    
    ##################################################################################
    
    #2. load final mod residuals (outcome mod) 
    fullmod <- loadRData(paste(namefolders,linfile,sep=""))
    onemodB <- fullmod[[nrFile]]
    
    #I verify here that the residuals etc. are put on correctly
    RuralPoverty04quality$ModCheckOutcomeMod <- resid(onemodB,type="resp")+onemodB$fitted.values
    
    RuralPoverty04quality$ModCheckOutcomeMod <- round(RuralPoverty04quality$ModCheckOutcomeMod, digits=8)
    RuralPoverty04quality[,Depvar] <- round(RuralPoverty04quality[,Depvar],digits=8)
    RuralPoverty04quality$ModCheckOutcomeModDiff <- RuralPoverty04quality[,Depvar]-RuralPoverty04quality$ModCheckOutcomeMod
    
    print(paste("The difference in Outcome mod fitted.values+residuals AND model outcome variable -",Depvar,paste("=",sum(RuralPoverty04quality$ModCheckOutcomeModDiff))))
    
    RuralPoverty04quality$OutcomeModresiduals <- resid(onemodB,type="resp")
    
    ####################################################################################################################
    
    #3. Get the correct shapefile

    shape=readOGR(paste("Data/Municipalities/",sep="",paste(shapeVar,".shp",sep="")), layer=shapeVar)#put in shapename
    
    #####################################################################################################################
    
    #4.1 Autocorr for Propensity model
    fit2ResidDf <- subset(RuralPoverty04quality, select=c("IBGECode7digit","PropModresiduals"))
    
    #with the shapefile Ive loaded make the moran statistics
    
    MoranProp <- NewMoranFunc2019(shape,fit2ResidDf,"PropModresiduals")
    write.csv(MoranProp, file=paste(namefolders,linfile,sep="",paste("_",nrFile,sep="",paste("_MoranTablePropensityWElection.csv",sep=""))),row.names=FALSE)
    
    #4.2 Autocorr for Outcome model
    fit2ResidDf <- subset(RuralPoverty04quality, select=c("IBGECode7digit","OutcomeModresiduals"))
    
    MoranOutcome <- NewMoranFunc2019(shape,fit2ResidDf,"OutcomeModresiduals")
    write.csv(MoranOutcome, file=paste(namefolders,linfile,sep="",paste("_",nrFile,sep="",paste("MoranTableOutcomeWElection.csv",sep=""))),row.names=FALSE)
    
    ###########################################################################################
    
    #Then get a final table of results
    
    names(MoranProp)[2:5] <- paste(names(MoranProp)[2:5],"_prop",sep="")
    MoranProp$Model <- paste(namefolders,linfile,sep="",paste("_",nrFile,sep=""))
    MoranProp <- subset(MoranProp, select=c(6,1:5))
    
    names(MoranOutcome)[2:5] <- paste(names(MoranOutcome)[2:5],"_outcome",sep="")
    MoranOutcome$Model <- paste(namefolders,linfile,sep="",paste("_",nrFile,sep=""))
    MoranOutcome <- subset(MoranOutcome, select=c(6,1:5))
    
    FinTab <- merge(MoranProp, MoranOutcome, by=c("Model","Type"))
    
    result <- FinTab
    
  }
}

#To save files as excel
save.xlsx <- function (file, ...)
{
  require(xlsx, quietly = TRUE)
  objects <- list(...)
  fargs <- as.list(match.call(expand.dots = TRUE))
  objnames <- as.character(fargs)[-c(1, 2)]
  nobjects <- length(objects)
  for (i in 1:nobjects) {
    if (i == 1)
      write.xlsx(objects[[i]], file, sheetName = objnames[i])
    else write.xlsx(objects[[i]], file, sheetName = objnames[i],
                    append = TRUE)
  }
  print(paste("Workbook", file, "has", nobjects, "worksheets."))
}

###########################################################################################################

ResidCorreEndogeneityWElection <- function(folder,script,start1,end1,yearsample,timeperiod,progr,
                                           inffunc,namefolders,modlist,nrmod,biomvar,
                                           linfile,nrFile){
  result=NULL
  {

    
    #1.Load in the sample needed based on sample script
    script <- paste(folder,script,sep="")
    
    sourcePartial(script,startTag="#s1",endTag="#s2")
    sourcePartial(script,startTag="#start1_",endTag="#fin1_")
    sourcePartial(script,"#start2_","#fin2_")
    sourcePartial(script,start1,end1)
    
    print(c(cbpsDepVar,cbpsbaselineDepVar))
    
    #2. Get on the election variable
    ElecDF <- read.csv(paste("Data/Elections/AllElectionPerMun_",yearsample,sep="",paste("sample_Fin.csv",sep="")),
                       header=TRUE, sep=",")
    
    ElecDFsub <- subset(ElecDF, select=c("codeibge",paste("PresElec_vs_",timeperiod,sep="",paste("_w_",progr,sep=""))))
    names(ElecDFsub)[1] <- "IBGECode7digit"
    
    #it has to be the same sample as rural
    RuralPoverty04quality <- merge(x=RuralPoverty04quality,y=ElecDFsub,by="IBGECode7digit",all.x=T)
    
    #Get sample information
    print(length(RuralPoverty04quality[,1]))
    elecvar <- length(names(RuralPoverty04quality))
    print(length(which(is.na(RuralPoverty04quality[,elecvar]))))
    RuralPoverty04quality <- RuralPoverty04quality[!is.na(RuralPoverty04quality[,elecvar]),]
    print(length(RuralPoverty04quality[,1]))
    
    #get sample without influential points (for non-OLS models)
    RuralPoverty04quality <- inffunc(RuralPoverty04quality,namefolders,modlist,nrmod)
    
    #set the correct reference state/biome
    #2. Get the reference levels (want this to be the ones with lowest)
    #2.1. Cannot have empty levels
    RuralPoverty04quality$stateName <- droplevels(RuralPoverty04quality$stateName)
    RuralPoverty04quality[,biomvar] <- droplevels(RuralPoverty04quality[,biomvar])
    
    #2.2. Find the category with least observations
    
    #State
    onetabS <- data.frame(table(RuralPoverty04quality[,"stateName"]))
    onetabS <- onetabS [order(onetabS$Freq),]
    onetabS$Var1 <- as.character(onetabS$Var1)
    
    smallestS <- onetabS[1,1]
    
    #Biome
    onetabB <- data.frame(table(RuralPoverty04quality[,biomvar]))
    onetabB <- onetabB [order(onetabB$Freq),]
    onetabB$Var1 <- as.character(onetabB$Var1)
    
    smallestB <- onetabB[1,1]
    
    #2.3. Set the smallest state/biome as the reference (the last one)
    
    #state Reference
    stateSeq <- levels(RuralPoverty04quality$stateName)
    stateSeqNoRef <- stateSeq [! stateSeq %in% smallestS]
    stateSeqFin <- c(stateSeqNoRef,smallestS)#this ensures that the smallest one is last!
    
    #biome Reference
    biomeSeq <- levels(RuralPoverty04quality[,biomvar])
    biomeSeqNoRef <- biomeSeq [! biomeSeq %in% smallestB]
    biomeSeqFin <- c(biomeSeqNoRef,smallestB)#this ensures that the smallest one is last!
    
    #set the level
    RuralPoverty04quality$stateName <- factor(RuralPoverty04quality$stateName, levels = stateSeqFin)
    RuralPoverty04quality[,biomvar] <- factor(RuralPoverty04quality[,biomvar], levels = biomeSeqFin)
    
    #2.4. Set the contrast
    contrasts(RuralPoverty04quality$stateName) <- "contr.Sum"
    contrasts(RuralPoverty04quality[,biomvar]) <- "contr.Sum"
    
    ##################################################################################################################
    ##################################################################################################################
    
    #1. load final mod residuals (outcome mod) 
    fullmod <- loadRData(paste(namefolders,linfile,sep=""))
    onemodB <- fullmod[[nrFile]]
    
    #I verify here that the residuals etc. are put on correctly
    RuralPoverty04quality$ModCheckOutcomeMod <- resid(onemodB,type="resp")+onemodB$fitted.values
    
    RuralPoverty04quality$ModCheckOutcomeMod <- round(RuralPoverty04quality$ModCheckOutcomeMod, digits=8)
    RuralPoverty04quality[,Depvar] <- round(RuralPoverty04quality[,Depvar],digits=8)
    RuralPoverty04quality$ModCheckOutcomeModDiff <- RuralPoverty04quality[,Depvar]-RuralPoverty04quality$ModCheckOutcomeMod
    
    print(paste("The difference in Outcome mod fitted.values+residuals AND model outcome variable -",Depvar,paste("=",sum(RuralPoverty04quality$ModCheckOutcomeModDiff))))
    
    RuralPoverty04quality$OutcomeModresiduals <- resid(onemodB,type="resp")
    
    ####################################################################################################################
    
    #2. Run correlation
    
    #2.1. For outcome model
    
    x2 <- RuralPoverty04quality[c("OutcomeModresiduals",IndepVarLin)]
    y2 <- RuralPoverty04quality[c("OutcomeModresiduals",IndepVarLin)]
    OutcomeSpearmanCorr <- cor(x2, y2, method="spearman")
    Outcome_SpearmanCorr <- OutcomeSpearmanCorr[1,2]
    OutcomePearsonCorr <- cor(x2, y2, method="pearson")
    Outcome_PearsonCorr <- OutcomePearsonCorr[1,2]
    
    #Put them together
    AllTog <- data.frame(cbind(Outcome_SpearmanCorr,Outcome_PearsonCorr))
    AllTog$Model <- paste(namefolders,linfile,sep="",paste("_",nrFile,sep=""))
    AllTog <- subset(AllTog,select=c("Model","Outcome_SpearmanCorr","Outcome_PearsonCorr"))
    
    #Save
    write.csv(AllTog, file=paste(namefolders,linfile,sep="",paste("_",nrFile,sep="",paste("_OutTreatEndogeneityCorrelationWElection.csv",sep=""))),row.names=FALSE)
    
    result <- AllTog
    
  }
}



